suppressMessages(library(Seurat))
suppressMessages(library(ggplot2))
suppressMessages(library(ggrepel))
suppressMessages(library(msigdbr))
suppressMessages(library(fgsea))
suppressMessages(library(dplyr))
# This DEA was performed using scStudio: https://compbio.imm.medicina.ulisboa.pt/app/scStudio 
# scStudio employs the FindMarkers function available from the Seurat package
# Default parameters were used except for: test.use = "MAST" and logfc.threshold = -Inf 
dea <- readRDS("/mnt/nmorais-nfs/marta/shiny/scstudio/tokens/karine/dea.rds")
markers_c3 <- dea$r0.4_3vsOthers_MAST
markers_c3$neutral_rank <- abs(markers_c3$`Log2FC (mean)`)
markers_c3 <- markers_c3[order(markers_c3$neutral_rank, decreasing = TRUE),]
markers_c3
markers_c1 <- dea$r0.4_1vsOthers_MAST
markers_c1$neutral_rank <- abs(markers_c1$`Log2FC (mean)`)
markers_c1 <- markers_c1[order(markers_c1$neutral_rank, decreasing = TRUE),]
markers_c1
rank_c3 <- as.data.frame(cbind(markers_c3$Gene, markers_c3$neutral_rank))

colnames(rank_c3) <- c("genes", "log2FC")
  
nrank_c3 <- as.numeric(as.character(rank_c3[,2]))

names(nrank_c3) <- rank_c3$genes
  
get_categories <-  msigdbr_collections()
  
hallmarks <-  msigdbr(species = "Mus musculus", category = "H")

hallmarks <- hallmarks %>% split(x = .$gene_symbol, f = .$gs_name)
  
gsea_result_c3 <- fgsea(pathways = hallmarks, eps = 0, stats = nrank_c3)
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (86.96% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
results_tidy_c3 <- gsea_result_c3 %>% as_tibble() %>% arrange(desc(NES))
  
results_tidy_c3
rank_c1 <- as.data.frame(cbind(markers_c1$Gene, markers_c1$neutral_rank))

colnames(rank_c1) <- c("genes", "log2FC")
  
nrank_c1 <- as.numeric(as.character(rank_c1[,2]))

names(nrank_c1) <- rank_c1$genes
  
get_categories <-  msigdbr_collections()
  
hallmarks <-  msigdbr(species = "Mus musculus", category = "H")

hallmarks <- hallmarks %>% split(x = .$gene_symbol, f = .$gs_name)
  
gsea_result_c1 <- fgsea(pathways = hallmarks, eps = 0, stats = nrank_c1)
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (81.29% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
results_tidy_c1 <- gsea_result_c1 %>% as_tibble() %>% arrange(desc(NES))
  
results_tidy_c1
plotEnrichment(hallmarks[["HALLMARK_INTERFERON_GAMMA_RESPONSE"]],
                      nrank_c3 ) + labs(title = "C3 - HALLMARK_INTERFERON_GAMMA_RESPONSE")

plotEnrichment(hallmarks[["HALLMARK_INTERFERON_GAMMA_RESPONSE"]],
                      nrank_c1 ) + labs(title = "C1 - HALLMARK_INTERFERON_GAMMA_RESPONSE")

leading_edge_c3 <- results_tidy_c3[results_tidy_c3$pathway == "HALLMARK_INTERFERON_GAMMA_RESPONSE",]$leadingEdge[[1]]
leading_edge_c3
##   [1] "Ccl5"     "Cfb"      "H2-Q7"    "Ifitm2"   "Bst2"     "Ptgs2"   
##   [7] "Rsad2"    "Cd274"    "Sod2"     "Oasl1"    "B2m"      "Cd38"    
##  [13] "Isg20"    "Slamf7"   "Tnfaip3"  "Xaf1"     "Lgals3bp" "Il18bp"  
##  [19] "Herc6"    "Arid5b"   "Hif1a"    "H2-D1"    "Il15ra"   "Irf7"    
##  [25] "Rnf213"   "Ddx60"    "Cd40"     "Vcam1"    "Peli1"    "Il10ra"  
##  [31] "Stat1"    "H2-Aa"    "Icam1"    "Rtp4"     "H2-Eb1"   "Psme2"   
##  [37] "Cxcl9"    "Trim25"   "Gbp4"     "Stat3"    "Ifit1bl1" "Tapbp"   
##  [43] "Psmb9"    "Psme1"    "Pla2g4a"  "Cd86"     "Ube2l6"   "Nampt"   
##  [49] "H2-DMa"   "St3gal5"  "Nfkbia"   "Ifih1"    "Cdkn1a"   "Nfkb1"   
##  [55] "Ccl2"     "Tnfaip2"  "Btg1"     "Zbp1"     "Irf8"     "Psmb8"   
##  [61] "Ptpn2"    "Itgb7"    "Irf1"     "Lcp2"     "Parp14"   "Stat2"   
##  [67] "Mvp"      "Helz2"    "Psmb10"   "Tap1"     "Eif4e3"   "Myd88"   
##  [73] "Csf2rb2"  "Fcgr1"    "Il15"     "Usp18"    "Irf2"     "Vamp8"   
##  [79] "Pml"      "Pnpt1"    "Txnip"    "Gbp8"     "Psmb2"    "Ccl7"    
##  [85] "Ogfr"     "Sp110"    "Irf5"     "Fas"      "Samd9l"   "Socs1"   
##  [91] "Cfh"      "Casp7"    "Cd74"     "Gch1"     "Sri"      "Batf2"   
##  [97] "Ifi30"    "Gbp3"     "Isg15"    "Sppl2a"   "Ifi35"    "Ifi44"   
## [103] "Lap3"     "Ptpn6"    "Fgl2"     "Parp12"   "Trafd1"   "Ifi27"   
## [109] "Ly6e"     "Pde4b"    "Isoc1"    "Ripk2"    "Socs3"    "H2-M3"   
## [115] "Tnfaip6"
leading_edge_c1 <- results_tidy_c1[results_tidy_c1$pathway == "HALLMARK_INTERFERON_GAMMA_RESPONSE",]$leadingEdge[[1]]
leading_edge_c1
##   [1] "H2-Aa"    "H2-Eb1"   "Isg15"    "Cd74"     "Cxcl10"   "Rsad2"   
##   [7] "Ifit3"    "H2-Q7"    "Ccl5"     "Ifit2"    "Fcgr1"    "Fgl2"    
##  [13] "Samhd1"   "Zbp1"     "Ifitm3"   "Ifitm2"   "Sp110"    "Irf7"    
##  [19] "Rnf213"   "Cmpk2"    "Ifi35"    "Ifih1"    "Epsti1"   "Isg20"   
##  [25] "Oas3"     "Casp1"    "Ccl2"     "Trafd1"   "Nmi"      "Il15"    
##  [31] "Gch1"     "Sppl2a"   "Gbp3"     "Psmb8"    "Rtp4"     "Ogfr"    
##  [37] "Irf5"     "Tap1"     "Parp14"   "Ube2l6"   "Ly6e"     "Itgb7"   
##  [43] "Samd9l"   "Xaf1"     "Parp12"   "Psmb10"   "Psmb9"    "Usp18"   
##  [49] "Herc6"    "Dhx58"    "Casp4"    "Socs3"    "Tapbp"    "Casp3"   
##  [55] "Pml"      "Jak2"     "Tnfaip3"  "Casp8"    "Znfx1"    "Irf2"    
##  [61] "Stat2"    "Pfkp"     "Helz2"    "Bst2"     "Nampt"    "Ptpn1"   
##  [67] "Trim25"   "Oasl1"    "Ptgs2"    "Cfb"      "Psme2"    "Psma3"   
##  [73] "Myd88"    "Eif2ak2"  "Pim1"     "Slamf7"   "Sri"      "Stat1"   
##  [79] "Tnfaip2"  "Psma2"    "Oas2"     "Gbp9"     "Klrk1"    "Psme1"   
##  [85] "Mvp"      "Cd86"     "Nlrc5"    "Il10ra"   "Cfh"      "Ifit1bl1"
##  [91] "Nfkb1"    "Pnpt1"    "Il18bp"   "B2m"      "Ccl7"     "Cd38"    
##  [97] "Tdrd7"    "Nfkbia"   "Cd40"     "Lcp2"     "Il4ra"    "Isoc1"   
## [103] "St3gal5"  "Tnfaip6"  "Vcam1"    "Psmb2"    "Gbp8"     "Hif1a"   
## [109] "Cxcl9"    "St8sia4"  "Irf8"     "Eif4e3"   "Trim26"   "Nod1"    
## [115] "Tnfsf10"  "H2-DMa"   "Lgals3bp"
table(leading_edge_c3 %in% leading_edge_c1)
## 
## FALSE  TRUE 
##    30    85
table(leading_edge_c1 %in% leading_edge_c3)
## 
## FALSE  TRUE 
##    32    85
unique_genes <- unique(c(leading_edge_c3, leading_edge_c1))
rownames(markers_c3) <- markers_c3$Gene
rownames(markers_c1) <- markers_c1$Gene
df <- data.frame(Genes = unique_genes, 
                 Rank_c3 = markers_c3[unique_genes,]$`Log2FC (mean)`,
                 Rank_c1 = markers_c1[unique_genes,]$`Log2FC (mean)`)
df
gene_label <- rep("Common", length(unique_genes))
gene_label[df$Genes %in% leading_edge_c3 & !(df$Genes %in% leading_edge_c1)] <- "Cluster 3"
gene_label[df$Genes %in% leading_edge_c1 & !(df$Genes %in% leading_edge_c3)] <- "Cluster 1"
table(gene_label)
## gene_label
## Cluster 1 Cluster 3    Common 
##        32        30        85
df$gene_label <- gene_label
ggplot(df) +
  geom_point( aes(x=Rank_c3, y=Rank_c1, col = gene_label)) +
  
  geom_label_repel(aes(x=Rank_c3, y=Rank_c1, label = Genes), size = 5,
                   max.overlaps = 10) + 
  
  scale_color_manual(values=c("red","orange", "grey45"), name = "Leading edge genes")  + 
  ggtitle("Hallmark: Interferon gamma response") +
        xlab("C3 rank") + 
        ylab("C1 rank") +
        theme( text = element_text(size=20)) +
  
  geom_hline(yintercept=0, linetype="dashed", 
                color = "blue", size=0.3) +
  geom_vline(xintercept=0, linetype="dashed", 
                color = "blue", size=0.3) +
  coord_cartesian(ylim = c(-1, 2), xlim = c(-1,5), expand = FALSE)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: ggrepel: 119 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

cxcl9 <- rep("", nrow(df))
cxcl9[df$Genes == "Cxcl9"] <- "Cxcl9"
genes <- df$Genes
genes[genes == "Il18bp"] <- ""

ggplot(df) +
  geom_point( aes(x=Rank_c3, y=Rank_c1, col = gene_label), size = 6) +
  
    
  geom_label_repel(aes(x=Rank_c3, y=Rank_c1, label = genes), size = 20, max.overlaps = 10) +
  geom_label_repel(aes(x=Rank_c3, y=Rank_c1, label = cxcl9), size = 20, force = 15) +
  
  scale_color_manual(values=c("magenta","red", "grey45"), name = "Leading edge genes")  + 
  ggtitle("Hallmark: Interferon gamma response") +
        xlab("C3 rank") + 
        ylab("C1 rank") +
        theme( text = element_text(size=70)) +
  
  geom_hline(yintercept=0, linetype="dashed", 
                color = "blue", size=0.3) +
  geom_vline(xintercept=0, linetype="dashed", 
                color = "blue", size=0.3) +
  coord_cartesian(ylim = c(-1, 2), xlim = c(-1,2), expand = FALSE)
## Warning: ggrepel: 99 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

write.table(df, "/mnt/nmorais-nfs/marta/pA_karine/r_session/integrated_analysis_revised/paper-figures/S4M-IFN-y.txt", row.names = FALSE, quote = FALSE)
sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.6 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] dplyr_1.1.4        fgsea_1.20.0       msigdbr_7.5.1      ggrepel_0.9.5     
## [5] ggplot2_3.4.2      SeuratObject_4.1.3 Seurat_4.1.1      
## 
## loaded via a namespace (and not attached):
##   [1] Rtsne_0.16            colorspace_2.1-0      deldir_1.0-6         
##   [4] ellipsis_0.3.2        ggridges_0.5.6        rstudioapi_0.15.0    
##   [7] spatstat.data_3.0-0   farver_2.1.1          leiden_0.4.3         
##  [10] listenv_0.9.0         fansi_1.0.6           codetools_0.2-18     
##  [13] splines_4.1.2         cachem_1.0.8          knitr_1.45           
##  [16] polyclip_1.10-4       jsonlite_1.8.8        ica_1.0-3            
##  [19] cluster_2.1.4         png_0.1-8             uwot_0.1.14          
##  [22] shiny_1.8.0           sctransform_0.3.5     spatstat.sparse_3.0-0
##  [25] compiler_4.1.2        httr_1.4.4            Matrix_1.5-3         
##  [28] fastmap_1.1.1         lazyeval_0.2.2        cli_3.6.2            
##  [31] later_1.3.2           htmltools_0.5.7       tools_4.1.2          
##  [34] igraph_1.3.5          gtable_0.3.4          glue_1.7.0           
##  [37] RANN_2.6.1            reshape2_1.4.4        fastmatch_1.1-3      
##  [40] Rcpp_1.0.12           scattermore_0.8       jquerylib_0.1.4      
##  [43] vctrs_0.6.5           babelgene_22.9        nlme_3.1-162         
##  [46] progressr_0.13.0      lmtest_0.9-40         spatstat.random_3.1-3
##  [49] xfun_0.42             stringr_1.5.1         globals_0.16.2       
##  [52] mime_0.12             miniUI_0.1.1.1        lifecycle_1.0.4      
##  [55] irlba_2.3.5.1         goftest_1.2-3         future_1.31.0        
##  [58] MASS_7.3-58.2         zoo_1.8-12            scales_1.3.0         
##  [61] spatstat.core_2.4-4   promises_1.2.1        spatstat.utils_3.0-1 
##  [64] parallel_4.1.2        RColorBrewer_1.1-3    yaml_2.3.8           
##  [67] reticulate_1.26       pbapply_1.7-0         gridExtra_2.3        
##  [70] sass_0.4.8            rpart_4.1.19          stringi_1.8.3        
##  [73] highr_0.10            BiocParallel_1.28.3   rlang_1.1.3          
##  [76] pkgconfig_2.0.3       matrixStats_0.63.0    evaluate_0.23        
##  [79] lattice_0.20-45       ROCR_1.0-11           purrr_1.0.2          
##  [82] tensor_1.5            labeling_0.4.3        patchwork_1.2.0      
##  [85] htmlwidgets_1.6.4     cowplot_1.1.1         tidyselect_1.2.0     
##  [88] parallelly_1.34.0     RcppAnnoy_0.0.20      plyr_1.8.9           
##  [91] magrittr_2.0.3        R6_2.5.1              generics_0.1.3       
##  [94] withr_3.0.0           mgcv_1.8-41           pillar_1.9.0         
##  [97] fitdistrplus_1.1-8    survival_3.5-0        abind_1.4-5          
## [100] sp_1.6-0              tibble_3.2.1          future.apply_1.10.0  
## [103] crayon_1.5.2          KernSmooth_2.23-20    utf8_1.2.4           
## [106] spatstat.geom_3.0-6   plotly_4.10.1         rmarkdown_2.26       
## [109] grid_4.1.2            data.table_1.15.2     digest_0.6.34        
## [112] xtable_1.8-4          tidyr_1.3.1           httpuv_1.6.14        
## [115] munsell_0.5.0         viridisLite_0.4.2     bslib_0.6.1